APPENDIX 



/* final_x.c - final version of program */ 
#include <stdio.h> 
♦include <string.h> 
♦include <math.h> 

♦define NO_SEQ 500 /* max number of sequences */ 

♦define NO_AA_SEG 50 /* max sequence length times 10 */ 

char garbage [40]; 
FILE *fp, *fo; 

void ReadIn_Garbage (int y) { 
int x; 

for (x=0; x<y; x++) 

fscanf(fp, "%s", garbage); 

} 



double factorial ( float number) { 

/* Good for values less than 170 */ 

float gamma[101]-{1.000 / 99.433, 49.442, 32.785, 24.461, 



19.470, 16.146, 
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double value; 
float t; 
int y; 

if (number > 20.0) 

value = exp (number* log (number ) -number + 
0.5*log(2*3.14159265*number) ) ; 

else if (number == 0.0) 
value = 1.0; 

else { 

value = 1.0; 
t = number; 
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while (t > 1.0) { 

value = value * t; 
t=t-1.0; 

} 

y=t*100; 

value = value * gamma [y] * t; 

} 

return (value) ; 

} 



double Calc_Value (int *root, int *fix, int *root_rand, int *fix_rand) 

/* Function that calculates Kl */ 

{ 

float mean[26]={0. 072658, 0.000114, 0.024692, 0.050007, 
0.061087, 

0.041774, 0.071589, 0.023392, 0.052691, 0.000000, 

0.063923, 

0.089093, 0.023150, 0.042931, 0.000000, 0.052228, 

0.039871, 

0.052012, 0.073087, 0.055606, 0.000000, 0.063321, 

0.012720, 

0.000995, 0.032955, 0.000103}; 
double K1=0.0, dEnergy, dEnergy_rand; 
int i; 

float th_root, th_fix; 
int total_fix=0, total_root=0; 
float th_root_rand, th^f ix_rand; 
float total_f ix_rand=0 . 0, total_root_rand=0 . 0; 
double Kl rand=0.0; 



for (i=0; i<26; i++) { 

total_root+=root [i] ; 
total_f ix+=f ix [i] ; 
total_root_rand+=root_rand[i] ; 
total fix rand+=fix rand[i]; 



for (i=0; i<26; i++) { 

/* Calculates Regular Part */ 
if (total_fix != 0) 

th_fix = 274.0*fix[i]/total_fix; 

else 

th fix = 0.0; 



if (total_root != 0) 

th_root = 274 . 0*root [i] /total_root; 

else 

th_root = 0.0; 
dEnergy=0. 0; 



if (mean[i] > 0.001) { 

dEnergy=dEnergy+ ( th_root-th_f ix) *log (mean [i] ) + 
(th_f ix-th_root ) *log ( 1-mean [ i ] ) ; 



if (th_fix > 0.01) 

if (th fix > 170.0) 
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dEnergy = dEnergy + 

(th_f ix*log (th_f ix) -th__f ix) ; 

else 

dEnergy = dEnergy + 

log (factorial (th_fix) ) ; 

if (th_root > 0.01) 

if (th_root >170.0) 

dEnergy = dEnergy - 

(th_root*log (th_root ) -th_root ) ; 

else 

dEnergy = dEnergy - 

log (factorial (th_root) ) ; 

if {274.0 - th_fix > 0.01) 
if ((274.0 - th_fix) > 170.0) 

dEnergy = dEnergy + ( (274 . 0-th_f ix) * 
log(274.0-th_fix)-(274.0-th_fix) ) ; 

else 

dEnergy = dEnergy + 

log (factorial (274 . 0-th_fix) ) ; 

if (274.0 - th_root > 0.01) 

if ((274.0 - th_root) >170.0) 

dEnergy = dEnergy - ( (274 . 0-th_root ) 
log(274.0-th_root)-(274.0-th_root) ) ; 

else 

dEnergy = dEnergy - 

log (factorial (274. 0-th_root) ) ; 



} 



/* Calculates Random Part */ 

if (total_fix_rand != 0) 

th_fix_rand = 274 . 0*f ix_rand [i] /total_f ix_rand; 

else 

th_fix_rand = 0.0; 

if (total_root_rand != 0) 
th_root_rand = 
274 . 0*root_rand[i] /total_root_rand; 
else 

th_root_rand = 0.0; 
dEnergy_rand=0 . 0; 
. if (mean[i] > 0.001) { 

dEnergy_rand=dEnergy_rand+ ( t h_root_rand- 
th_f ix_rand) *log (mean [i] ) + 

( th_f ix_rand-th_root_rand) *log ( 1- 

mean [i] ) ; 

if (th_fix_rand > 0.01) 
if (th_fix_rand > 170.0) 

dEnergy_rand = dEnergy_rand + 
(th_f ix_rand*log ( th_f ix_rand) -th_f ix_rand) ; 

else 

dEnergy_rand = dEnergy_rand + 

log ( factorial ( th_f ix_rand) ) ; 
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if (th_root_rand > 0.01) 
if (th_root_rand >170.0) 

dEnergy_rand = dEnergy_rand - 
(th_root_rand*log (th_root_rand) -th_root_rand) ; 

else 

dEnergy_rand = dEnergy_rand - 

log (factorial ( th_root_rand) ) ; 

if (274.0 - th_fix_rand > 0.01) 
if ((274.0 - th_fix__rand) > 170.0) 

dEnergy_rand = dEnergy_rand + ((274.0 

th_f ix_rand) * 

log (274 . 0-th_f ix_rand) - (274 . 0 

th_f ix__rand) ) ; 

else 

dEnergy_rand = dEnergy_rand + 
log{factorial (274 . 0-th_f ix_rand) ) ; 

if (274.0 - th_root_rand > 0.01) 
if ((274.0 - th_root_rand) >170.0) 

dEnergy_rand = dEnergy_rand - ((274.0 

th_root__rand) * 

log (274 . 0-th_root_rand) - (274 . 

th_root_rand) ) ; 

else 

dEnergy_rand = dEnergy_rand - 
log (factorial (274 . 0-th_root_rand) ) ; 

} 

Kl_rand+=dEnergy_rand*dEnergy_rand; 

dEnergy-dEnergy-dEnergy_rand; 

Kl+=dEnergy*dEnergy; 

} 

Kl=sqrt (Kl) ; 
return (Kl) ; 

} 



main ( ) 
{ 

FILE *fs, *ft, *fh; 

int atom_no [3000] , aa_no[3000]; 

float pos_x[3000] , pos_y[3000], pos_z[3000], occup[3000]; 
float B_fac[3000] ; 

char atom[3000] [4] , aa[3000][4], chain [3000] [2] ; 
int t=0; 

double mean_val [NO_SEQ] ; 

int temp; 
double Kl; 

int aacount [500] [27] ; 
int aacount_fix [500] [27] ; 
char seqname[500] [22] ; 

char seq[NO_SEQ] [NO_AA_SEG] [ 10] ; 

char datain [12] , dataout [12] ; 
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•ass* 



int no_rows; 

int aaname, aanum; 

int s, z, x, y=0; 

int pno=0, mien, len, fix, no; 

float shit; 
int sel, seqf lag [500] ; 
char tempo [10] ; 
int atom_pdb, aa_pdb; 
int aa_dist [26] ; 
int aa_dist_f ix [26] ; 
char sain [20] ; 
char pdbin [20] ; 
int before, after; 
int numbers; 

print f ( "Enter structure/alignment file: "); 
scanf { "%s", sain) ; 
fh=f open (sain, "r") ; 
fscanf(fh, "%s", pdbin); 

/* Sets Up I/O Files */ 

printf ( "Enter input filename: "); 
scanf ("%s M , datain) ; 
fp=f open (datain, "r") ; 
printf ("Enter outfile: "); 
scanf ( "%s", dataout) ; 
f o-f open (dataout , "w"); 

/* Reads in Header of msf file */ 

strcpy (garbage, "dun"); 
while (strcmp (garbage, "MSF:")) 
ReadIn_Garbage ( 1 ) ; 

fscanf{fp, "%d", &len) ; 

while (strcmp (garbage, "Name:")) 
ReadIn_Garbage ( 1 ) ; 

/* Calculates mien and no__rows from len */ 
mlen=len/10; 
if (mlen*10 != len) 
mlen++ ; 
no_rows=mlen/5 ; 
if (no_rows*5 != mien) 

no_rows++; 
printf ( "no_rows = %d ", no_rows) ; 

/* Reads in Sequence names */ 
no=0; 

while (strcmp (garbage, "//")){ 

f scanf (fp, "%s", seqname [no++] ) ; 
strcpy (garbage, "duh" ) ; 

while (strcmp (garbage, "Name:") && strcmp (garbage, "//")) 
ReadIn_Garbage ( 1 ) ; 

} 
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Reads sequence into array */ 
f scant" (fp, "%s", garbage); 
if (strcmp (garbage, "1")) 
numbers=0; 

else 

number s=l ; 

for (z=0; z<no_rows; z++) { 
if (numbers) 

ReadIn_Garbage ( 2 ) ; 
for (y=0; y<no; y++) { 

for (x=0; x<5 && 5*z+x < mien; x++) 

fscanf(fp, "%s", seq[y] [ (x)+5* (z) ] ) 
ReadIn_Garbage ( 1 ) ; 

} 

} 



/* Converts all lowercase to uppercase */ 

for (z=0; z<len; z++) 

for (y=0; y<no; y++) 

if (seq[y] [0] [z] >= 'a' && seq[y][0][z] <= 'z') 
seq[y] [0] [z]=seq[y] [0] [z] + (T - 'a'); 



/* Selection for/against amino acids at a position */ 
for (x=0; x<2; x++) { 
if (x) 

printf ( "Selection against amino acids (x when 



done) \n") 



else 

t=l; 
do { 



selection */ 



aanum) 



printf ( "Selection for amino acids (x when done) \n 



printf ( "Enter amino acid: "); 
scanf ( "%s" , tempc) ; 
aaname=tempc [0] ; 
if (aaname >= 'a' && aaname <= 
aaname = aaname + ( 'A' - 
if (aaname == ? X' && !x && t) 



' z' ) 
' a ' ) ; 



/* Finish that 



for (y=0; y<no; y++) 

seqf lag [y] =1; 
if (aaname !- 'X 1 ) { 

printf ( "Enter amino acid number: "); 
scanf ("%d", & aanum ) ; 
for (z=0; z<no; z++) 

if (seq[z] [0] [aanum-1] == aaname) 
seqf lag [z] =l-x; 

} 

fprintf (fo, "%d - AA=%c, AA#=%d\n", 1-x, aaname, 
t=0; 

} while (aaname != I X I ); 



1634701.1 



-46- 



fprintf (fo, "\n") ; 



/* Reads in pdb file */ 



f s=fopen (pdbin, "r") ; 
f t=f open (strcat (dataout , 



.pdb"), V]; 



m 



do{ 



x=0; 
do{ 



fscanf (f s, 


"%s", 


garbage) ; 


(strcmp (garbage, 


"ATOM" ) ) ; 


fscanf (f s, 


"%d", 


&atom no [x] ) ; 


fscanf ( f s, 


"%s", 


atomfx] ) ; 


fscanf (f s, 


"%s", 


aa [x] ) ; 


fscanf (fs, 


"%s", 


chain [x] ) ; 


fscanf {fs, 


"%d", 


&aa no [x] ) ; 


fscanf (f s, 


"%f", 


&pos_x [x] ) ; 


fscanf (f s, 


"%f ", 


&pos_y [x] ) / 


fscanf ( f s, 


"%f ", 


&pos z [x] ) / 


fscanf (f s, 


"If", 


Soccup [x] ) ; 


fscanf (f s, 


"%f", 


&B_fac[x++] ) ; 


fscanf (f s, 


"%s", 


garbage) ; 


(strcmp (garbage, 


"END" ) ) ; 



atom_pdb=x; 

aa pdb=aa no[x-l]-aa no[0]+l; 



/* Count amino acids/position */ 
for (x=0; x<len; x++) 

for (y=0; y<27; y++) { 
aac'ount [x] [y]=0; 
aacount_f ix [x] [y]=0; 

} 

for (x=0; x<mlen; x++) 

for (z=0; z<10; z++) 
for (y=0; y<no; y++) { 

if (seq[y] [x] [z] >= 'A' && seq[y][x][z] <= ! Z') 

aacount [x*10+z] [seq[y] [x] [z]- f A f ]++; 
else if (seq[y] [x] [z] '.') 

aacount [x*10+z] [26]++; 
if (seqflag[y] ) { 

if (seq[y] [x] [z] >= 'A' && seq[y][x][z] 

aacount_f ix [x*10+z] [seq[y] [x] [z]- 

else if (seq[y] [x] [z] == f .') 

aacount f ix [x*10+z] [26] ++; 



<= ' Z') 
•A']++; 



} 



temp=0; 
s=0; 

for (x=0; x<26; x++) { 
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aa_dist [x] -0; 

aa dist fix[x]=0; 



for (x=0; x<26; x++) 

for (y=0; y<len; y++) { 

aa_dist [x] +=aacount [y] [x] ; 
aa_dist_f ix [x] +=aacount_f ix [y] [x] ; 

) 



for (y=0; y<aa_pdb; y++) { 

if (s>before || y == 0) { 

fscanf(fh, "%d", &before) ; 
if (before I I y==0) 

fscanf(fh, "%d", &after) ; 

} 



if ( s == before) 
s = after; 



Kl=Calc_Value (aacount [s] , aacount_f ix [s ] , aa_dist, 
aa_dist_fix) /100.0; 

mean_val [y] =K1; 
s++; 

} 

/* Writes filenames to output file */ 
for (z=0; z<no_rows; z++) { 

for (y=0; y<no; y++) { 

if (seqflag[y] ) { 

fprintf (fo, "%-10s", seqname[y] ) ; 
for (x=0; x<5 && x+5*z!=mlen; x++) 
fprintf (fo, "%.10s " f seq[y] [ (x) + (5*z) ] ) 
fprintf (fo, "\n") ; 
pno++; 

} 

} 

fprintf (fo, "\n\n") ; 

} 

print f ( "no_rows = %d no_rows); 
pno=pno /no__rows ; 

/* Print AA Composition */ 
fix=l; 
do{ 

printf("\nAA Comp which position (0 to exit)? ") ; 
scanf ("%d", &fix) ; 
if (fix != 0) { 

for (x=0; x<26; x++) { 

shit=100*aacount_f ix [f ix-1] [x] /pno; 
printf("%c = %3d (%3.0f%%) x +'A', 

aacount_f ix [ f ix-1 ] [x] , shit); 

} 

shit=100*aacount_f ix [f ix-1] [26] /pno; 

printf(". = %3d {%3 . Of %%) \n", aacount_f ix [ f ix- 

1] [26], shit); 

} 
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.3:5=. 



# 



} while (fix != 0) ; 

temp=0; 
s=0; 

/* Writes out new PDB file with Kl */ 
fprintf (ft, "GRASP PDB FILE\n" ) ; 
fprintf (ft, " FORMAT NUMBER=3\n" ) ; 
for (y=0; y<atom_pdb; y++) { 
if (temp != aa_no[y]) 

Kl=mean_val [s++] / 
fprintf (ft, "ATOM %4d %-4s%s %s %3d %7.3f %7.3f \ 

%7.3f ", atom_no[y], atomfy], aa [y] , chain [y] , aa_no [y] , pos_x[y], 
pos_y[y], pos_z[y]); 

fprintf (ft," %5f\n", Kl) ; 
temp=aa_no [y] / 

} 

fprintf (ft, "END") ; 
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/* Creates an SA file that containes information about how to align 
the msf to the structure defined. This requires there be no deleted 
amino acids in the alignment or pdb. This assumes there will be x and 
x+1 for each aa in the pdb. Starting position is based on the position 
in the alignment the corresponds to the first position in the pdb file 
- V 

♦include <stdio.h> 

char garbage [40]; 
FILE *fp, *fo; 

void ReadIn_Garbage (int y) { 
int x; 

for (x=0; x<y; x++) 

fscanf(fp, "Is", garbage); 

} 



main ( ) 
{ 

O FILE *fs, *ft; 

."g int atom_no [3000] , aa_no[3000]; 

^ float pos_x[3000] , pos_y[3000], pos_z[3000], occup[3000], 

B_fac[3000]; 

char atom[3000] [4] , aa[3000][4], chain [3000] [2] ; 
t int t; 

M char seqname [500] [22] ; 

m char seq[500] [50] [10] ; 

CP char datain [ 12] , dataout [12] ; 

£ char name [20] ="Name : " ; 

int s, z, x, y=0 ; 
int mien, len,no; 
g int i,j; 

lz int atom_pdb, aa_pdb; 

char searchname [22] ; 
h jf int startaa; 

M int nameno; 

int temp; 

int numbers; 

int no_rows; 

/* Sets Up I/O Files */ 

print f ( "Enter input alignment filename: ") ; 

scanf ("%s", datain) ; 

fp=f open (datain, "r") ; 
print f ( "Enter input PDB file: ") ; 
scanf ("%s", datain); 
f s=f open (datain, "r " ) ; 

print f ( "Enter corresponding name of structure to alignment: "); 
scanf ("%s", searchname) ; 

printf ( "Enter starting position on alignment: "); 
scanf ("%d", &startaa) ; 
printf ("Enter outfile: "); 

scanf ( "%s", dataout) ; 

f o=f open (dataout , "w") ; 
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Reads in Header */ 

strcpy (garbage, "duh") ; 
while (strcmp (garbage, "MSF : " ) ) 
ReadIn_Garbage ( 1 ) ; 

fscanf(fp, "%d", &len) ; 

while (strcmp (garbage, "Name:")) 
ReadIn_Garbage ( 1 ) ; 



4 



Calculates mien from len */ 
mlen=len/10; 
if (mlen*10 != len) 
mlen++; 
no_rows=mlen/5; 
if (no_rows*5 != no_rows) 
no_rows++; 

Reads in Sequence names */ 
no=0; 

while (strcmp (garbage, "//")){ 

f scanf ( fp, "%s", seqname [no++] ) ; 
strcpy (garbage, "duh") ; 

while (strcmp (garbage, "Name:") && strcmp (garbage, "//") 
ReadIn_Garbage ( 1 ) ; 

} 



/* Reads sequence into array */ 
fscanf(fp, "%s", garbage); 
if (strcmp (garbage, "1")) 
numbers=0; 

else 

numbers=l; 

for (z=0; z<no_rows; z++) { 
if (numbers) 

ReadIn_Garbage ( 2 ) ; 
for (y=0; y<no; y++) { 

for (x=0; x<5 && 5*z+x < mien; x++) { 

fscanf(fp, "%s", seq[y] [ <x)+5* (z) ] ) ; 

} 

ReadIn_Garbage ( 1 ) ; 

} 

} 



/* Converts all lowercase to uppercase */ 

for ( z=0; z<len; z++) 

for (y=0; y<no; y++) 

if (seq[y] [0] [z] f a' && seq[y] [0] [z] <= 1 z 1 ) 

seq[y] [0] [z]=seq[y] [0] [z]+{'A' - 'a'); 
else if (seq[y] [0] [z] < f A f || seq[y][0][z] > f Z f ) 
seq[y] [0] [z]=- . ! ; 
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/* Reads in pdb */ 



J3ES. 



CO 



en 



do{ 

fscanf (fs, "%s", garbage); 
} while (strcmp (garbage, "ATOM")); 

temp=0; 
x=0; 
do{ 



f scanf (f s, 


"%d", 


&atom_no [x] ) ; 


f scanf ( f s, 


"%s", 


atom [x] ) ; 


fscanf (f s, 


"%s", 


aa [x] ) ; 


fscanf (f s, 


"%s", 


chain [x] ) ; 


fscanf ( f s, 


"%d", 


&aa_no [x] ) , 




fscanf (f s, 


"%f ", 


&pos x [x] ) , 




fscanf (f s, 


"%f ", 


&pos_y [x] ) , 




fscanf (f s, 


"%f ", 


&pos_z [x] ) , 




fscanf (f s, 


"%f ", 


&occup [x] ) , 




fscanf (f s, 


"%f ", 


&B f ac [x] ) , 
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do{ 

fscanf (fs, "%s", garbage); 
} while (strcmp (garbage, "ATOM") && strcmp (garbage, "END")); 
if (temp != aa_no[x]) 
temp=aa_no [x++] ; 
} while (strcmp (garbage, "END")); 
atom_pdb=x; 

Finds corresponding index for alignment name */ 
nameno=-l ; 

for (i=0; i<no; i++) 

if ( ! strcmp (seqname [i] , searchname) ) 
nameno=i; 
if (nameno == -1) 

printf ( "Sequence not f ound !!!!!! \n" ) ; 

startaa — ; 

fprintf(fo, "%s\n", datain) ; 
if (startaa !=0) 

fprintf(fo, "0 %d\n", startaa); 

x=0; 

for (i=startaa; i<len I I x<atom_pdb; i++) 

if (seq[nameno] [0] [i] != f . f && aa_no[x+l] == aa_no[x]+l) 
x++; 

else if (aa_no[x+l] != aa_no[x]+l){ 
s=i; 
do { 

x++; 
i++; 

} while (aa_no[x+l] != aa_no[x]+l); 
t-i; 

if (x<atom_pdb) 

fprintf(fo, "%d %d\n", s,t); 

} 

else if (seq[nameno] [0] [i] == l . , ){ 
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s=i; 
do 

i++; 

while (seq[nameno] [0] [i] == 1 . 1 ) ; 
t=i; 

if (x<atom_pdb && t != len) 

fprintf(fo, "%d %d\n", s,t); 

} 

fprintf (fo, "000\n") ; 



/* Closes files */ 
f close ( f o) ; 
f close ( fp) ; 

fclose (fs) ; 

fclose (ft) / 



} 
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